home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Language/OS - Multiplatform Resource Library
/
LANGUAGE OS.iso
/
a_utils
/
_archvrs
/
unix
/
zoo21src.lha
/
zoo
/
lzc.asm
< prev
next >
Wrap
Assembly Source File
|
1992-11-20
|
12KB
|
589 lines
title Lempel-Ziv Compressor
; $Source$
; $Id$
;Derived from Tom Pfau's public domain assembly code.
;The contents of this file are hereby released to the public domain.
; -- Rahul Dhesi 1988/08/24
UNBUF_IO equ 1 ;use unbuffered I/O
public _lzc
include asmconst.ai
include macros.ai
check_gap equ 4000 ;Check ratio every so many bits
scale_limit equ 32000 ;scale down if bitsout > scale_limit
rat_thresh equ 0ffffh ;don't reset if rat > rat_thresh
;Hash table entry
hash_rec struc
first dw ? ; First entry with this value
next dw ? ; Next entry along chain
char db ? ; Suffix char
hash_rec ends
extrn docrc:near ;Procedure for block CRC in lzd.asm
ifdef UNBUF_IO
extrn _read:near
extrn _blockwrite:near
else
extrn _zooread:near
extrn _zoowrite:near
endif
;Declare Segments
_text segment byte public 'code'
_text ends
dgroup group _data
assume ds:dgroup,es:dgroup
_data segment word public 'data'
extrn _out_buf_adr:word ;address of C output buffer
extrn _in_buf_adr:word ;address of C input buffer
extrn memflag:byte ;got memory? flag
save_sp dw ?
bytesin dw ? ;count of bytes read
bitsout dw ? ;count of bits written
ratio dw ? ;recent ratio
ratflag db ? ;flag to remind us to check ratio
bit_interval dw ? ;interval at which to test ratio
input_handle dw ?
output_handle dw ?
hash_seg dw ?
prefix_code dw ?
free_code dw ?
max_code dw ?
nbits dw ?
k db ?
bit_offset dw ?
input_offset dw 0
input_size dw 0
_data ends
memory segment para public 'memory'
hash label hash_rec
memory ends
add_code macro
local ac1,ac2,ac3
mov bx,free_code ;Get code to use
push ds ;point to hash table
mov ds,hash_seg
or di,di ;First use of this prefix?
jz ac1 ;zero means yes
mov [si].next,bx ;point last use to new entry
jmp short ac2
ac1: mov [si].first,bx ;Point first use to new entry
ac2: cmp bx,maxmax ;Have we reached code limit?
je ac3 ;equal means yes, just return
;call index ;get address of new entry
call_index ;macro for speed
mov [si].first,-1 ;initialize pointers
mov [si].next,-1
mov [si].char,al ;save suffix char
inc es:free_code ;adjust next code
ac3: pop ds ;restore seg reg
endm
read_char macro ;Macro for speed
local m$1,m$2,m$3,m$4
inc [bytesin] ;Maintain input byte count for ratio
mov di,input_offset ;Anything left in buffer?
cmp di,input_size
jb m$1 ;less means yes
mov cx,inbufsiz
call doread ;read block
cmp ax,-1
jnz m$3
jmp IO_err ;input error
m$3:
mov dx,_in_buf_adr ;for docrc
call docrc
or ax,ax ;Anything left?
jz m$2 ;zero means no, finished
mov input_size,ax ;Save bytes read
mov input_offset,0 ;Point to beginning of buffer
mov di,0
m$1:
mov si,_in_buf_adr
add si,di
lodsb ;Read it in
inc input_offset ;Adjust pointer
clc ;Success
jmp short m$4
m$2: stc ;Nothing left
m$4:
endm
;Start writing code
_text segment
assume cs:_text,ds:dgroup,es:dgroup,ss:nothing
_lzc proc near
push bp ;Standard C entry code
mov bp,sp
push di
push si
push ds ;Save ds to be sure
mov bx,ds
mov es,bx
;Get two parameters, both integers, that are input file handle and
;output file handle
mov ax,[bp+4]
mov [input_handle],ax
mov ax,[bp+6]
mov [output_handle],ax
call compress ;Compress file
;Status received back in AX
pop ds
pop si ;Standard C return code
pop di
mov sp,bp
pop bp
ret
_lzc endp
compress proc near
mov [save_sp],sp ;Save SP in case of error return
;Initialize variables for serial re-entrancy
mov [bit_offset],0
mov [input_offset],0
mov [input_size],0
test memflag,0ffH ;Memory allocated?
jnz gotmem ;If allocated, continue
malloc <((maxmax * 5) / 16 + 20)> ;allocate it
jnc here1
jmp MALLOC_err1
here1:
mov hash_seg,ax ;Save segment address of mem block
mov memflag,0ffh ;Set flag to remind us later
gotmem:
l1: call init_table ;Initialize the table and some vars
mov ax,clear ;Write a clear code
call write_code
;call read_char ;Read first char
read_char ;macro for speed
l4:
;new code to check compression ratio
test [ratflag],0FFH ;Time to check ratio?
jz rd$1 ;Skip checking if zero
call check_ratio
rd$1:
xor ah,ah ;Turn char into code
l4a: mov prefix_code,ax ;Set prefix code
;call read_char ;Read next char
read_char ;macro for speed
jc l17 ;Carry means eof
mov k,al ;Save char in k
mov bx,prefix_code ;Get prefix code
call lookup_code ;See if this pair in table
jnc l4a ;nc means yes, new code in ax
;call add_code ;Add pair to table
add_code ;Macro for speed
push bx ;Save new code
mov ax,prefix_code ;Write old prefix code
call write_code
pop bx
mov al,k ;Get last char
cmp bx,max_code ;Exceed code size?
jnb l4$
jmp l4
l4$:
cmp nbits,maxbits ;Currently less than maxbits?
jb l14 ;yes
mov ax,clear ;Write a clear code
call write_code
call init_table ;Reinit table
mov al,k ;get last char
jmp l4 ;Start over
l14: inc nbits ;Increase number of bits
shl max_code,1 ;Double max code size
jmp l4 ;Get next char
l17: mov ax,prefix_code ;Write last code
call write_code
mov ax,eof ;Write eof code
call write_code
mov ax,bit_offset ;Make sure buffer is flushed to file
cmp ax,0
je OK_ret
mov dx,ax ;dx <- ax
shr ax,1
shr ax,1
shr ax,1 ;ax <- ax div 8
and dx,07 ;dx <- ax mod 8
;If extra bits, make sure they get
je l17a ;written
inc ax
l17a: call flush
OK_ret:
xor ax,ax ;Normal return -- compressed
ret
IO_err:
mov ax,2 ;I/O error return
mov sp,[save_sp] ;Restore stack pointer
ret
MALLOC_err1: ;hash table alloc error
mov ax,1 ;Malloc error return
mov sp,[save_sp] ;Restore stack pointer
ret
compress endp
init_table proc near
mov [bytesin],0 ;Input byte count
mov [bitsout],0 ;Output bit count
mov [ratio],0
mov [ratflag],0
mov [bit_interval],check_gap
mov nbits,9 ;Set code size to 9
mov max_code,512 ;Set max code to 512
push es ;Save seg reg
mov es,hash_seg ;Address hash table
mov ax,-1 ;Unused flag
mov cx,640 ;Clear first 256 entries
mov di,offset hash ;Point to first entry
rep stosw ;Clear it out
pop es ;Restore seg reg
mov free_code,first_free ;Set next code to use
ret ;done
init_table endp
write_code proc near
push ax ;Save code
mov cx,nbits
add [bitsout],cx ;Maintain output bit count for ratio
sub [bit_interval],cx
jg rd$2 ;OK if not reached interval
mov [ratflag],1 ;else set flag -- check ratio soon
rd$2:
mov ax,bit_offset ;Get bit offset
mov cx,nbits ;Adjust bit offset by code size
add bit_offset,cx
mov dx,ax ;dx <- ax
shr ax,1
shr ax,1
shr ax,1 ;ax <- ax div 8
and dx,07 ;dx <- ax mod 8
;Now ax contains byte offset and
;dx contains bit offset within that byte (I think)
cmp ax,outbufsiz-4 ;Approaching end of buffer?
jb wc1 ;less means no
call flush ;Write the buffer
push dx ;dx contains offset within byte
add dx,nbits ;adjust by code size
mov bit_offset,dx ;new bit offset
pop dx ;restore dx
;ax is an index into output buffer. Next, ax <- address of buffer[ax]
add ax,[_out_buf_adr]
mov si,ax ;put in si
mov al,byte ptr [si] ;move byte to first position
;put value of al into first byte of output buffer
push bx
mov bx,[_out_buf_adr]
mov [bx],al
pop bx
xor ax,ax ;Byte offset of zero
wc1: add ax,[_out_buf_adr] ;Point into buffer
mov di,ax ;Destination
pop ax ;Restore code
mov cx,dx ;offset within byte
xor dx,dx ;dx will catch bits rotated out
jcxz wc3 ;If offset in byte is zero, skip shift
wc2: shl ax,1 ;Rotate code
rcl dx,1
loop wc2
or al,byte ptr [di] ;Grab bits currently in buffer
wc3: stosw ;Save data
mov al,dl ;Grab extra bits
stosb ;and save
ret
write_code endp
flush proc near
push ax ;Save all registers
push bx ;AX contains number of bytes to write
push cx
push dx
push si ;may not be necessary to save si & di
push di
push ax ;save byte count
push ax ;byte count
push _out_buf_adr ;buffer address
push output_handle ;zoofile
ifdef UNBUF_IO
call _blockwrite
else
call _zoowrite
endif
add sp,6
pop cx ;recover byte count
cmp ax,cx
jz here2
jmp IO_err ;I/O error
here2:
pop di
pop si
pop dx
pop cx
pop bx
pop ax
ret
flush endp
lookup_code proc near
push ds ;Save seg reg
mov ds,hash_seg ;point to hash table
;call index ;convert code to address
call_index ;macro for speed
mov di,0 ;flag
mov bx,[si].first
cmp bx,-1 ;Has this code been used?
je gc4 ;equal means no
inc di ;set flag
gc2:
;call index ;convert code to address
call_index ;macro for speed
cmp [si].char,al ;is char the same?
jne gc3 ;ne means no
clc ;success
mov ax,bx ;put found code in ax
pop ds ;restore seg reg
ret ;done
gc3:
mov bx,[si].next
cmp bx,-1 ;More left with this prefix?
je gc4 ;equal means no
jmp gc2 ;try again
gc4: stc ;not found
pop ds ;restore seg reg
ret ;done
lookup_code endp
comment #
index proc near
mov si,bx ;si = bx * 5 (5 byte hash entries)
shl si,1 ;si = bx * 2 * 2 + bx
shl si,1
add si,bx
ret
index endp
# end comment
check_ratio proc near
push ax
; mov dl,'*' ;'*' printed means checking ratio
; call sendout
;Getting ready to check ratios. If bitsout is over scale_limit,
;then we scale down both bitsout and bytesin by a factor
;of 4. This will avoid overflow.
mov cx,[bitsout]
cmp cx,scale_limit
jb scale_ok
mov cl,2
shr [bytesin],cl
shr [bitsout],cl
scale_ok:
;Note MASM bug: "mov ah,high [bytesin]" and
;"mov al,low [bytesin]" don't work.
mov ah,byte ptr [bytesin]
mov dl,byte ptr [bytesin+1]
sub al,al
sub dh,dh ;dx:ax = 8 * bitsin = 256 * bytesin
mov cx,[bitsout] ;cx <- bitsout
or cx,cx ;Division by zero?
jnz candivide ;No -- go ahead divide
mov ax,0FFFFH ;yes -- assume max poss value
jmp short divided
candivide:
;Calculate cx as (bytesin * 256) / bitsout = bitsin * 8 / bitsout
div cx ;ax <- rat <- dx:ax / cx
shl ax,1
shl ax,1 ;rat <- 4 * bytes_in / bytes_out
divided:
;Enter here with ax = rat = bitsin / bitsout.
; call print_data ;print info for debugging
;If rat > rat_thresh then ratio is good; do not reset table
; cmp ax,rat_thresh
; ja ratdone
;Compare rat against ratio
mov bx,ax ;save rat in bx
cmp ax,[ratio] ;cmp rat,ratio
jb ratless ;trouble if ratio is now smaller
mov ax,[ratio]
call mul7 ;ax <- 7 * ratio
add ax,bx ;ax = 7 * ratio + rat
shr ax,1
shr ax,1
shr ax,1 ;ax = (7 * ratio + rat) / 8
mov [ratio],ax ;ratio = (7 * ratio + rat) / 8
jmp short ratdone
ratless: ;ratio is going down, so...
mov [bytesin],0
mov [bitsout],0
; mov dl,'#' ;'#' printed means table reset
; call sendout
mov ax,clear ;Write a clear code
call write_code
call init_table ;Reinit table
ratdone:
mov [ratflag],0
mov [bit_interval],check_gap
pop ax
ret
check_ratio endp
comment #
sendout proc near ;char in dl send to console
push ax
mov ah,02
int 21H
pop ax
ret
sendout endp
# end comment
mul7 proc near ;multiply ax by 7
push dx
mov dx,7
mul dx ;dx:ax <- 7 * ax
pop dx
ret
mul7 endp
comment #
mul3 proc near ;multiply ax by 3
push dx
mov dx,3
mul dx ;dx:ax <- 3 * ax
pop dx
ret
mul3 endp
# end comment
comment #
mul1_125 proc near ;multiply ax by 1.125
push bx
mov bx,ax
shr bx,1
shr bx,1
shr bx,1 ;bx = n / 8
add ax,bx ;ax <- n + n / 8
pop bx
ret
mul1_125 endp
# end comment
comment #
print_data proc near
;Debugging -- print bytesin, bitsout, rat, and ratio
push ax
push bx
push cx
push dx
push ax ;print rat
call _prtint
add sp,2
push [ratio] ;print ratio
call _prtint
add sp,2
push [bytesin]
call _prtint
add sp,2
push [bitsout]
call _prtint
add sp,2
pop dx
pop cx
pop bx
pop ax
ret
print_data endp
# end comment
;doread reads cx characters and stores them in input buffer
;return value from zooread is returned in ax
doread proc near ;reads block
push bx
push cx
push dx
push si
push di
push cx ;byte count
push _in_buf_adr ;buffer address
push input_handle ;zoofile
ifdef UNBUF_IO
call _read
else
call _zooread
endif
add sp,6
pop di
pop si
pop dx
pop cx
pop bx
ret
doread endp
_text ends
end